License

The R markdown code that generated this document is licensed under an MIT software license.

The document itself, and all graphs, tables, figures and text within it, is licensed under a Creative Commons Attribution 4.0 International license (CC-BY).

Reading in and merging the data.

# set working directory, load libraries, set-up
#wd<-"/Users/marc/work/MLW_LSTM/CRYPTOFAZ"
#setwd(wd)

outDir<-paste(sep="","../RESULTS/",gsub(pattern="-",replacement="",Sys.Date()))
inDir<-"../../DATA/20190820"
inDir2<-"../../DATA/20190815"
inDir3<-"../../DATA/20200401"
inDir4<-"../20201023_PreScreenData"
inDir5<-"../20200804_DummyPID"
if(!dir.exists(outDir)){dir.create(outDir)}

elisaFile1<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA 02_01MAY2019.csv")
elisaFile2<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA_27AUG2018.csv")
elisaFile3<-paste(sep="/",inDir,"CRYPTOFAZ ELISA DATA3_29APR2019.csv")
pcrFile<-paste(sep="/",inDir2,"pcr_datatransfer.csv")
pcrCtFile<-paste(sep="/",inDir3,"PCRResults-Ct.update.csv")
preScreenFile<-paste(sep="/",inDir4,"RDT-PCR_complete.csv")

dummyPidFile<-paste(sep="/",inDir5,"dummyPID_realPID.csv")

outFileElisa<-paste(sep="",outDir,"/cryptofazElisaNormalised_",Sys.Date(),".csv")
outFileMergedData<-paste(sep="",outDir,"/cryptofazPcrElisaMerged_",Sys.Date(),".csv")

outPrefix<-paste(sep="",outDir,"/cryptofazElisaPcr_",Sys.Date())

#logFile<-paste(sep="",outDir,"/cryptofazLogFile_",gsub(Sys.Date(),pattern="-",replacement=""),".log")
logFile<-""

cat(file=logFile,append=F,paste(sep="","This is cryptofazELISA_normalisation20200318.Rmd.\n\nInput parameters:\n\telisaFile1 = < ",elisaFile1," >,\n\telisaFile2 = < ",elisaFile2," >,\n\telisafile3 = < ",elisaFile3," >,\n\tpcrFile = < ",pcrFile," >,\n\tpcrCtFile = < ",pcrCtFile," >,\n\tpreScreenFile = < ",preScreenFile,">,\n\toutFileElisa = < ",outFileElisa," >,\n\toutFileMergedData = < ",outFileMergedData," >.\n\n"))
## This is cryptofazELISA_normalisation20200318.Rmd.
## 
## Input parameters:
##  elisaFile1 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA 02_01MAY2019.csv >,
##  elisaFile2 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA_27AUG2018.csv >,
##  elisafile3 = < ../../DATA/20190820/CRYPTOFAZ ELISA DATA3_29APR2019.csv >,
##  pcrFile = < ../../DATA/20190815/pcr_datatransfer.csv >,
##  pcrCtFile = < ../../DATA/20200401/PCRResults-Ct.update.csv >,
##  preScreenFile = < ../20201023_PreScreenData/RDT-PCR_complete.csv>,
##  outFileElisa = < ../RESULTS/20210111/cryptofazElisaNormalised_2021-01-11.csv >,
##  outFileMergedData = < ../RESULTS/20210111/cryptofazPcrElisaMerged_2021-01-11.csv >.
# read the data
eli1<-read.csv(elisaFile1,stringsAsFactors=F)
eli2<-read.csv(elisaFile2,stringsAsFactors=F)
eli3<-read.csv(elisaFile3,stringsAsFactors=F)
pcr<-read.csv(pcrFile,stringsAsFactors=F)
pcrCt<-read.csv(pcrCtFile,stringsAsFactors=F)
preScreen<-read.csv(preScreenFile,stringsAsFactors=F)
dummyPID<-read.csv(dummyPidFile,stringsAsFactors=F)
cat(file=logFile,append=T,paste(sep="","Reading the data.\n",
                                "..ELISA file 1: ",nrow(eli1)," rows, ",ncol(eli1)," columns \n",
                                "..ELISA file 2: ",nrow(eli2)," rows, ",ncol(eli2)," columns \n",
                                "..ELISA file 3: ",nrow(eli3)," rows, ",ncol(eli3)," columns \n",
                                "..PCR file: ",nrow(pcr)," rows, ",ncol(pcr)," columns \n",
                                "..PCR Ct file: ",nrow(pcrCt)," rows, ",ncol(pcrCt)," columns \n",
                                "..Pre-screening file: ",nrow(preScreen)," rows, ",ncol(preScreen)," columns \n",
                                "..Dummy PID file: ",nrow(dummyPID)," rows, ",ncol(dummyPID)," columns \n",
                                "Done.\n\n"))
## Reading the data.
## ..ELISA file 1: 43 rows, 5 columns 
## ..ELISA file 2: 33 rows, 4 columns 
## ..ELISA file 3: 76 rows, 5 columns 
## ..PCR file: 190 rows, 34 columns 
## ..PCR Ct file: 428 rows, 7 columns 
## ..Pre-screening file: 582 rows, 7 columns 
## ..Dummy PID file: 22 rows, 2 columns 
## Done.
# fix the Ct values (recode 'Undertermined' values)
pcrCt$Ct[pcrCt$Ct=="Undetermined"]<-"40" # check with James and Darwin whether 'Undertermined' should be NA or a very high Ct value
pcrCt$Ct<-as.numeric(pcrCt$Ct)
pcrCt$Time.point<-gsub(pattern=" $",replacement="",pcrCt$Time.point)
pcrCt$Time.point<-gsub(pattern=" ",replacement="_",pcrCt$Time.point)
pcrCt$Time.point<-tolower(pcrCt$Time.point)
pcrCt<-pcrCt[is.element(el=pcrCt$Time.point,set=c("1st_am","only")),]
pcrCt$Lab.Sample.ID<-pcrCt$Sample.ID
pcrCt$Visit[pcrCt$Lab.Sample.ID=="CDE102F1"]<-6 # mnaually correcting the Visit number for this sample; see email from James Nyirenda 10/07/2020 that the visit was incorrectly recorded for this sample as it was run on a different experiment

# remove a pre-screening row with duplicated PATID and NA qPCR.Ct value
preScreen<-preScreen[!(preScreen$PATID=="CFZ0010019" & is.na(preScreen$qPCR.Ct)),]
preScreen$RDT.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$RDT.Result))
preScreen$qPCR.Result<-tolower(gsub(pattern=" ",replacement="",preScreen$qPCR.Result))
preScreen$RDT.Result[preScreen$RDT.Result==""]<-NA
preScreen$qPCR.Result[preScreen$qPCR.Result==""]<-NA
preScreen$qPCR.Result32<-ifelse(preScreen$qPCR.Ct<32,"positive","negative")

# normalise
eli1$OD.norm<-(eli1$Optical.Density-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])/(eli1$Optical.Density[eli1$Sample.Lab.ID=="Positive Control"]-eli1$Optical.Density[eli1$Sample.Lab.ID=="Negative Control"])
eli2$OD.norm<-(eli2$OPTICAL.DENSITY-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])/(eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Positive Control"]-eli2$OPTICAL.DENSITY[eli2$SAMPLE.ID=="Negative Control"])
eli3$OD.norm<-(eli3$Absorbance-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])/(eli3$Absorbance[eli3$Sample.ID=="Positive Control"]-eli3$Absorbance[eli3$Sample.ID=="Negative Control"])
cat(file=logFile,append=T,"Normalising the ELISA OD data.\nDone.\n\n")
## Normalising the ELISA OD data.
## Done.
# combine the data
elisa<-data.frame(
  Participant.ID=c(eli1$Participant.ID,eli2$PARTICIPANT.ID,eli3$Sample.ID.1),
  Sample.ID=c(eli1$Sample.Lab.ID,eli2$SAMPLE.ID,eli3$Sample.ID),
  Visit=c(eli1$Visit,eli2$VISIT,eli3$Visit),
  Time.Point=c(eli1$Time.point,rep(NA,nrow(eli2)),eli3$Time.Point),
  Series=c(rep("S1",nrow(eli1)),rep("S2",nrow(eli2)),rep("S3",nrow(eli3))),
  OD=c(eli1$Optical.Density,eli2$OPTICAL.DENSITY,eli3$Absorbance),
  OD.norm=c(eli1$OD.norm,eli2$OD.norm,eli3$OD.norm)
)
elisa$Participant.ID<-as.character(elisa$Participant.ID)
elisa$Sample.ID<-as.character(elisa$Sample.ID)
elisa[elisa==""]<-NA
elisa[elisa=="N/A"]<-NA
elisa[elisa=="None"]<-NA
elisa$Participant.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Sample.ID[elisa$Sample.ID=="Positive Control"]<-paste(sep=".","PosCtrl",elisa$Series[elisa$Sample.ID=="Positive Control"])
elisa$Participant.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])
elisa$Sample.ID[elisa$Sample.ID=="Negative Control"]<-paste(sep=".","NegCtrl",elisa$Series[elisa$Sample.ID=="Negative Control"])

# Note the positive / negative result is done on the un-normalised ELISA data
# The threshold for dual channel is 0.09 (negative below, positive above), see DATA/20190820/pt5014insert_rev_0806.pdf
elisa$Result<-ifelse(elisa$OD>=0.09,"positive","negative")

elisa$Sample.ID.alt<-paste(sep=".Visit",elisa$Participant.ID,elisa$Visit)
pcr$Sample.ID<-paste(sep=".Visit",pcr$PATID,pcr$Visit)
pcrCt$Sample.ID<-paste(sep=".Visit",pcrCt$Participant.ID,pcrCt$Visit)
allSampIDs<-sort(unique(c(pcr$Sample.ID,pcrCt$Sample.ID,elisa$Sample.ID.Alt)))
dat<-cbind(pcr[match(allSampIDs,pcr$Sample.ID),],pcrCt[match(allSampIDs,pcrCt$Sample.ID),],elisa[match(allSampIDs,elisa$Sample.ID.alt),])
colnames(dat)<-c(paste(sep=".","PCR",colnames(pcr)),paste(sep=".","PCRCT",colnames(pcrCt)),paste(sep=".","ELISA",colnames(elisa)))

dat$PCRCT.Result<-factor(ifelse(dat$PCRCT.Ct<35,"positive","negative"),levels=c("positive","negative"))
dat$PCRCT.Result32<-factor(ifelse(dat$PCRCT.Ct<32,"positive","negative"),levels=c("positive","negative"))

cat(file=logFile,append=T,paste(sep="","Combining the ELISA data from the 3 series.\n",
                                "..output will have ",nrow(elisa)," rows, ",ncol(elisa)," columns.\n\n",
                                "Combining the ELISA, PCR and PCR Ct data.\n",
                                "..the combined data will consist of ",nrow(dat)," rows, ",ncol(dat)," columns.\n\n",
                                "Done.\n\n"))
## Combining the ELISA data from the 3 series.
## ..output will have 152 rows, 9 columns.
## 
## Combining the ELISA, PCR and PCR Ct data.
## ..the combined data will consist of 190 rows, 54 columns.
## 
## Done.
# add the ELISA positive / negative results to the pre-screening data
elisaTmp<-elisa[elisa$Visit=="-1",]
preScreen$ELISA.Result<-elisaTmp$Result[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
preScreen$ELISA.OD.norm<-elisaTmp$OD[match(preScreen$PATID,elisaTmp$Participant.ID)]
rm(elisaTmp)

# adding dummy PIDs
dat$dummyPID<-dummyPID$PCR.PATID2[match(dat$PCR.PATID,dummyPID$PCR.PATID)]

# writing output
write.csv(elisa,row.names=F,file=outFileElisa)
write.csv(dat,row.names=F,file=outFileMergedData)

cat(file=logFile,append=T,"Writing merged data to disk.\nDone.\n\n")
## Writing merged data to disk.
## Done.

Data summary table

sumStat<-function(dat,var,thr=NULL,thrLowHigh="lt"){
  N<-sum(!is.na(dat[,var]))
  perc<-100*N/nrow(dat)
  med<-median(dat[,var],na.rm=T)
  rng<-range(dat[,var],na.rm=T)
  
  res<-c(
    N,
    paste(sep="","(",format(nsmall=1,round(digits=1,perc)),"%)"),
    format(nsmall=2,round(digits=2,med)),
    paste(sep="","[",paste(collapse=",",format(nsmall=2,round(digits=2,rng))),"]")
  )
  
  if(!is.null(thr)){
    for(t in thr){
      if(thrLowHigh=="lt"){
        n<-sum(!is.na(dat[,var]) & dat[,var]<t)
      }else if(thrLowHigh=="leq"){
        n<-sum(!is.na(dat[,var]) & dat[,var]<=t)
      }else if(thrLowHigh=="gt"){
        n<-sum(!is.na(dat[,var]) & dat[,var]>t)
      }else if(thrLowHigh=="geq"){
        n<-sum(!is.na(dat[,var]) & dat[,var]>=t)
      }
      perc<-paste(sep="","(",format(nsmall=1,round(digits=1,100*n/N)),"%)")
      
      res<-c(res,n,perc)
    }
  }
  
  return(res)
}

tmpCt<-sumStat(dat=dat,var="PCRCT.Ct",thr=c(32,35),thrLowHigh="lt")
tmpLog2<-sumStat(dat=dat,var="PCR.log2_first")
tmpOD<-sumStat(dat=dat[!is.na(dat$ELISA.Visit),],var="ELISA.OD",thr=0.09,thrLowHigh="geq")
tmpODnorm<-sumStat(dat=dat[!is.na(dat$ELISA.Visit),],var="ELISA.OD.norm")

sumTab<-data.frame(
  assay=c(rep("qPCR",8),rep("ELISA",7)),
  measurement=c(rep("Ct",5),rep("log2(oocyst / gram)",3),rep("OD",4),rep("normalised OD",3)),
  groups=c(rep("",3),"Ct<32","Ct<35",rep("",6),"OD >= 0.09",rep("",3)),
  statistic=c("N (%)","Median","Range","n (% out of N)","n (% out of N)","N (%)","Median","Range","N (%)","Median","Range","n (% out of N)","N (%)","Median","Range"),
  value1=c(tmpCt[!grepl(tmpCt,pattern="%")],
          tmpLog2[!grepl(tmpLog2,pattern="%")],
          tmpOD[!grepl(tmpOD,pattern="%")],
          tmpODnorm[!grepl(tmpODnorm,pattern="%")]),
  value2=c(tmpCt[grepl(tmpCt,pattern="%")][1],"","",tmpCt[grepl(tmpCt,pattern="%")][2:3],tmpLog2[grepl(tmpLog2,pattern="%")],"","",tmpOD[grepl(tmpOD,pattern="%")][1],"","",tmpOD[grepl(tmpOD,pattern="%")][2],tmpODnorm[grepl(tmpODnorm,pattern="%")],"","")
)

sumTab<-sumTab[,-1] # first column wasn't needed when using kable_styling; quicker to just get rid of it like this rather than edit the above code

kable(sumTab,caption="Summaries of qPCR and ELISA measurements.",col.names=NULL) %>%
  kable_styling(full_width=F) %>%
  pack_rows(paste(sep="","qPCR (",nrow(dat)," total visits for which qPCR was run)"), 1, 8) %>%
  pack_rows(paste(sep="","ELISA (",sum(!is.na(dat$ELISA.Visit))," total visits for which ELISA was run)"), 9, 15) %>%
  column_spec(1, bold = T) %>%
  collapse_rows(columns = 1, valign = "top")
Summaries of qPCR and ELISA measurements.
qPCR (190 total visits for which qPCR was run)
Ct N (%) 180 (94.7%)
Median 29.10
Range [21.24,40.00]
Ct<32 n (% out of N) 147 (81.7%)
Ct<35 n (% out of N) 168 (93.3%)
log2(oocyst / gram) N (%) 180 (94.7%)
Median 14.03
Range [ 3.82,22.34]
ELISA (141 total visits for which ELISA was run)
OD N (%) 141 (100.0%)
Median 0.00
Range [0.00,3.07]
OD >= 0.09 n (% out of N) 43 (30.5%)
normalised OD N (%) 141 (100.0%)
Median 0.00
Range [0.00,2.24]

Summary 2x2 table

Summarising the data in a 2x2 table (note this is only for summary; any usual contingency test based on this table will not be valid due to multiple observations per patient):

Ct threshold of 35

tt<-table(dat$PCRCT.Result[!is.na(dat$ELISA.Visit)],dat$ELISA.Result[!is.na(dat$ELISA.Visit)],useNA="always")
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"],ELISAna=tt[,3],row.names = NULL)
tt<-tt[,c(1:2,4:3,5)]

kable(tt,caption="",row.names=F,col.names=c("","","positive","negative","NA")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 3)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
positive negative NA
qPCR positive 43 89 0
negative 0 9 0
NA 0 0 0
write.csv(tt,file=paste(sep="",outPrefix,"_Table2_2x2_qPCR_ELISA_thrCt35.csv"))

Ct threshold of 32

tt<-table(dat$PCRCT.Result32[!is.na(dat$ELISA.Visit)],dat$ELISA.Result[!is.na(dat$ELISA.Visit)],useNA="always")
tt<-data.frame(var1="qPCR",qPCR=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"],ELISAna=tt[,3],row.names = NULL)
tt<-tt[,c(1:2,4:3,5)]

kable(tt,caption="",row.names=F,col.names=c("","","positive","negative","NA")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 3)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
positive negative NA
qPCR positive 41 75 0
negative 2 23 0
NA 0 0 0

Simple plots.

qPCR log2 oocyst per gram vs. ELISA normalised OD (first stool of the day).

# PCR log2(oocyst per gram in first stool) vs. ELISA normalised OD
g<-ggplot(data=dat,mapping=aes(x=PCR.log2_first,y=ELISA.OD.norm,col=PCR.treatment,group=PCR.PATID)) +
  geom_point(cex=3) + 
  geom_line(lwd=0.2,lty=2,data=dat[!is.na(dat$PCR.log2_first) & !is.na(dat$ELISA.OD.norm),]) +
  scale_color_manual(values=c("steelblue","orange"))
print(g)

Time / spaghetti plots for log2 oocyst per gram and ELISA normalised OD.

# profiles over time for log2(oocyst per gram in first stool) and normalised OD
g1<-ggplot(data=dat,mapping=aes(x=PCR.Visit,y=PCR.log2_first,col=PCR.treatment,pch=PCR.PATID,group=PCR.PATID)) +
  geom_point() + 
  geom_line(data=dat[!is.na(dat$PCR.log2_first),]) +
  scale_shape_manual(values=seq(0,22)) +
  scale_color_manual(values=c("steelblue","orange")) +
  theme(legend.position="top")

datTmp<-dat[!is.na(dat$ELISA.OD.norm),]
g2<-ggplot(data=datTmp,mapping=aes(x=ELISA.Visit,y=ELISA.OD.norm,col=PCR.treatment,pch=ELISA.Participant.ID,group=ELISA.Participant.ID)) +
  geom_point() + 
  geom_line(na.rm=T) +
  scale_shape_manual(values=seq(0,22))+
  scale_color_manual(values=c("steelblue","orange")) +
  theme(legend.position="top")

grid.arrange(g1,g2,ncol=2)

Arithmetic mean and 95% confidence intervals over time per treatment group for qPCR log2 oocyst per gram and ELISA normalised OD.

plotMeanCIs<-function(datCFZ,datPLCB,varName,mTitle,logScale=F,xLabel="Study Day",yLabel,yLimits,days=c(-1,1:6,"19-24","41-55"),daysAlt=c(-1,1:8),lwd=1,cexCust=1){
  tmpMat<-matrix(nrow=4,ncol=length(days))
  if(!logScale){
    rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SE","Placebo_SE")
  }else{
    rownames(tmpMat)<-c("CFZ_mean","Placebo_mean","CFZ_SElog","Placebo_SElog")
  }
  colnames(tmpMat)<-c(paste(sep="","day",days))
  
  for(j in 1:length(days)){
    if(!logScale){
      tmpMat["CFZ_mean",j]<-mean(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)
      tmpMat["CFZ_SE",j]<-sd(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
      tmpMat["Placebo_mean",j]<-mean(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)
      tmpMat["Placebo_SE",j]<-sd(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName],na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
    }else{
      tmpMat["CFZ_mean",j]<-exp(mean(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T))
      tmpMat["CFZ_SElog",j]<-sd(log(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName])))
      tmpMat["Placebo_mean",j]<-exp(mean(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T))
      tmpMat["Placebo_SElog",j]<-sd(log(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]),na.rm=T)/sqrt(sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName])))
    }
  }
  
  par(mar=c(5,5,3,0.5))
  plot(type="n",c(1,length(days)),yLimits,xlab=xLabel,ylab=yLabel,main=mTitle,axes=F,cex=cexCust,cex.axis=cexCust,cex.lab=cexCust,cex.main=cexCust)
  points(lty=2,col="steelblue",1:length(days),tmpMat["CFZ_mean",],cex=cexCust*1.5)
  idxNoNA<-sort(which(!is.na(tmpMat["CFZ_mean",])))
  lines(lty=2,col="steelblue",(1:length(days))[idxNoNA],tmpMat["CFZ_mean",idxNoNA],lwd=lwd)
  points(lty=2,col="orange",1:length(days)+0.1,tmpMat["Placebo_mean",],pch=22,cex=cexCust*1.5)
  idxNoNA<-sort(which(!is.na(tmpMat["Placebo_mean",])))
  lines(lty=2,col="orange",(1:length(days))[idxNoNA]+0.1,tmpMat["Placebo_mean",idxNoNA],lwd=lwd)
  for(j in 1:length(days)){
    qCfz<-qt(0.975,df=sum(!is.na(datCFZ[datCFZ$PCR.Visit==daysAlt[j],varName]))-1)
    qPlcb<-qt(0.975,df=sum(!is.na(datPLCB[datPLCB$PCR.Visit==daysAlt[j],varName]))-1)
    if(!logScale){
      segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue",lwd=lwd)
      segments(j,tmpMat["CFZ_mean",j],j,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue",lwd=lwd)
      segments(j-0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]+qCfz*tmpMat["CFZ_SE",j],col="steelblue",lwd=lwd)
      segments(j-0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],j+0.08,tmpMat["CFZ_mean",j]-qCfz*tmpMat["CFZ_SE",j],col="steelblue",lwd=lwd)
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange",lwd=lwd)
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange",lwd=lwd)
      segments(j+0.1-0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]+qPlcb*tmpMat["Placebo_SE",j],col="orange",lwd=lwd)
      segments(j+0.1-0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],j+0.1+0.08,tmpMat["Placebo_mean",j]-qPlcb*tmpMat["Placebo_SE",j],col="orange",lwd=lwd)
    }else{
      segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue",lwd=lwd)
      segments(j,tmpMat["CFZ_mean",j],j,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue",lwd=lwd)
      segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])+qCfz*tmpMat["CFZ_SElog",j]),col="steelblue",lwd=lwd)
      segments(j-0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),j+0.08,exp(log(tmpMat["CFZ_mean",j])-qCfz*tmpMat["CFZ_SElog",j]),col="steelblue",lwd=lwd)
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange",lwd=lwd)
      segments(j+0.1,tmpMat["Placebo_mean",j],j+0.1,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange",lwd=lwd)
      segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])+qPlcb*tmpMat["Placebo_SElog",j]),col="orange",lwd=lwd)
      segments(j+0.1-0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),j+0.1+0.08,exp(log(tmpMat["Placebo_mean",j])-qPlcb*tmpMat["Placebo_SElog",j]),col="orange",lwd=lwd)
    }
    
  }
  axis(side=1,at=1:length(days),labels=as.character(days),cex.lab=cexCust,cex.axis=cexCust,padj=0.25)
  axis(side=2,cex.lab=cexCust,cex.axis=cexCust)
  box()
  legend(x="topright",col=c("steelblue","orange"),legend=c("CFZ","Placebo"),lty=1,lwd=lwd,pch=c(1,22),horiz=T,pt.cex=cexCust*1.5,cex=cexCust,bty="n",)
}

plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="PCR.log2_first",mTitle="Mean log2 oocyst per gram over time.",yLabel="log2(oocyst per gram)",yLimits=range(dat$PCR.log2_first,na.rm=T),cexCust=2.25,lwd=4)

plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm",mTitle="Mean normalised ELISA optical density over time.",yLabel="Normalised OD",yLimits=c(-0.25,1),cexCust=2.25,lwd=4) # can't work on the log scale as ELISA data has actual zeroes in it

png(file=paste(sep="",outPrefix,"_Fig_ELISA-OD-OverTime.png"),height=9,width=16,units="in",res=450)
plotMeanCIs(datCFZ=dat[dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm",mTitle="Mean normalised ELISA optical density over time.",yLabel="Normalised OD",yLimits=c(-0.25,1),cexCust=2.25,lwd=4)
dev.off()
## quartz_off_screen 
##                 2
dat$PCR.log2_first.cfb<-dat$PCR.log2_first
dat$ELISA.OD.norm.cfb<-dat$ELISA.OD.norm

for(pid in unique(dat$PCR.PATID)){
  baseLog2<-ifelse(!is.na(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1]),(dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1]+dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==1])/2,dat$PCR.log2_first[dat$PCR.PATID==pid & dat$PCR.Visit==-1])
  baseOD<-ifelse(!is.na(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1]),(dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1]+dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==1])/2,dat$ELISA.OD.norm[dat$PCR.PATID==pid & !is.na(dat$ELISA.Visit) & dat$ELISA.Visit==-1])
  dat$PCR.log2_first.cfb[dat$PCR.PATID==pid]<-dat$PCR.log2_first[dat$PCR.PATID==pid]-baseLog2
  dat$ELISA.OD.norm.cfb[dat$PCR.PATID==pid]<-dat$ELISA.OD.norm[dat$PCR.PATID==pid]-baseOD
}

plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="PCR.log2_first.cfb",mTitle="Mean log2 oocyst per gram over time (change from baseline).",yLabel="CFB log2(oocyst per gram)",yLimits=c(-7,5),days=c(2:6,"19-24","41-55"),daysAlt=2:8,cexCust=2.25,lwd=4)

plotMeanCIs(datCFZ=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="CFZ",],datPLCB=dat[dat$PCR.Visit>=2 & dat$PCR.treatment=="Placebo",],varName="ELISA.OD.norm.cfb",mTitle="Mean CFB normalised ELISA optical density over time (change from baseline).",yLabel=" CFB Normalised OD",yLimits=c(-0.5,0.9),days=c(2:6,"19-24","41-55"),daysAlt=2:8,cexCust=2.25,lwd=4) 

Proportions of positive / negative results by ELISA and qPCR over time

datProp<-dat %>%
  group_by(PCR.Visit,PCR.treatment) %>%
  summarise(
    PCR=sum(!is.na(PCRCT.Result) & PCRCT.Result=="positive")/sum(!is.na(PCRCT.Result)),
    ELISA=sum(!is.na(ELISA.Result) & ELISA.Result=="positive")/sum(!is.na(ELISA.Result)),
    CountPCR=sum(!is.na(PCRCT.Result) & PCRCT.Result=="positive"),
    TotalPCR=sum(!is.na(PCRCT.Result)),
    CountELISA=sum(!is.na(ELISA.Result) & ELISA.Result=="positive"),
    TotalELISA=sum(!is.na(ELISA.Result))
  ) %>%
  add_column(lowPCR=NA,highPCR=NA,lowELISA=NA,highELISA=NA)

for(j in 1:nrow(datProp)){
  datProp$lowPCR[j]<-binom.test(x=datProp$CountPCR[j],n=datProp$TotalPCR[j])$conf.int[1]
  datProp$highPCR[j]<-binom.test(x=datProp$CountPCR[j],n=datProp$TotalPCR[j])$conf.int[2]
  if(datProp$TotalELISA[j]>0){datProp$lowELISA[j]<-binom.test(x=datProp$CountELISA[j],n=datProp$TotalELISA[j])$conf.int[1]}
  if(datProp$TotalELISA[j]>0){datProp$highELISA[j]<-binom.test(x=datProp$CountELISA[j],n=datProp$TotalELISA[j])$conf.int[2]}
}

datPropEst<-datProp %>%
  dplyr::select(c(PCR.Visit,PCR.treatment,PCR,ELISA)) %>%
  pivot_longer(cols=c(PCR,ELISA),names_to="Diagnostic",values_to="PropPos")

datPropLow<-datProp %>%
  dplyr::select(c(PCR.Visit,PCR.treatment,lowPCR,lowELISA)) %>%
  pivot_longer(cols=c(lowPCR,lowELISA),names_to="Diagnostic",values_to="PropPosLow")

datPropHigh<-datProp %>%
  dplyr::select(c(PCR.Visit,PCR.treatment,highPCR,highELISA)) %>%
  pivot_longer(cols=c(highPCR,highELISA),names_to="Diagnostic",values_to="PropPosHigh")

datProp<-data.frame(datPropEst,PropPosLow=datPropLow$PropPosLow,PropPosHigh=datPropHigh$PropPosHigh)

g1<-datProp %>%
  filter(Diagnostic=="PCR") %>%
  ggplot(mapping=aes(x=factor(PCR.Visit),y=PropPos,fill=PCR.treatment,col=PCR.treatment)) +
  geom_bar(stat="identity", position=position_dodge()) +
  geom_errorbar(mapping=aes(x=factor(PCR.Visit),ymin=PropPosLow,ymax=PropPosHigh),stat="identity", position=position_dodge(width=0.9),col="darkgrey",width=0.25) + 
  scale_fill_manual(values=c("steelblue","orange"),name="Treatment") +
  scale_colour_manual(values=c("steelblue","orange"),name="Treatment") +
  xlab("Study day") +
  ylab("Positive test proportion") +
  ggtitle("qPCR") +
  ylim(c(0,1)) +
  theme_light() +
  theme(text=element_text(size=20))

g2<-datProp %>%
  filter(Diagnostic=="ELISA") %>%
  ggplot(mapping=aes(x=factor(PCR.Visit),y=PropPos,fill=PCR.treatment,col=PCR.treatment)) +
  geom_bar(stat="identity", position=position_dodge()) +
  geom_errorbar(mapping=aes(x=factor(PCR.Visit),ymin=PropPosLow,ymax=PropPosHigh),stat="identity", position=position_dodge(width=0.9),col="darkgrey",width=0.25) + 
  scale_fill_manual(values=c("steelblue","orange"),name="Treatment") +
  scale_colour_manual(values=c("steelblue","orange"),name="Treatment") +
  xlab("Study day") +
  ylab("Positive test proportion") +
  ggtitle("ELISA") +
  ylim(c(0,1)) +
  theme_light() +
  theme(text=element_text(size=20))
  
grid.arrange(g1,g2,nrow=2)

png(file=paste(sep="",outPrefix,"_Fig_PositivityPropsOverTime.png"),height=14,width=16,units="in",res=450)
grid.arrange(g1,g2,nrow=2)
dev.off()
## quartz_off_screen 
##                 2

FIGURE 1: Box & jitter plots comparing qPCR Ct values according to ELISA positivity and treatment group.

Ct threshold of 35

# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value

# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]

# mixed censored regression
pDat<-dat %>%
  filter(!is.na(ELISA.Result)) %>%
  mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
  pdata.frame(index=c("PCR.PATID","PCR.Visit"))

censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf

pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]

g<-dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  mutate(pcrElisa=factor(case_when(
    ELISA.Result=="positive" & PCRCT.Result=="positive"~"PcrPosElisaPos",
    ELISA.Result=="positive" & PCRCT.Result=="negative"~"PcrNegElisaPos",
    ELISA.Result=="negative" & PCRCT.Result=="positive"~"PcrPosElisaNeg",
    ELISA.Result=="negative" & PCRCT.Result=="negative"~"PcrNegElisaNeg"
  ),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrPosElisaPos"))) %>%
  ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
  geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
  scale_color_manual(values=c("steelblue4","steelblue1","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(+) ELISA(+)"),name="") +
  xlab("ELISA") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.75)) +
  theme_light() +
  theme(text=element_text(size=20),legend.position="bottom") +
  geom_abline(slope=0,intercept=35,lty=2,col="grey35",lwd=1.25) +
  ggtitle("qPCR Ct values according to ELISA positivity.") +
  geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
  geom_text(mapping=aes(x=0.55,y=35.5,label="Ct=35"),size=6,col="grey35")

print(g)

png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct35.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2
sumStat<-dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  group_by(ELISA.Result) %>%
  summarise(median=format(nsmall=2,round(digits=2,median(PCRCT.Ct))),IQR=paste(sep="","(",paste(collapse=",",format(nsmall=2,round(digits=2,quantile(PCRCT.Ct,probs=c(0.25,0.75))))),")"))

print(sumStat)
## # A tibble: 2 x 3
##   ELISA.Result median IQR          
##   <chr>        <chr>  <chr>        
## 1 negative     29.75  (27.61,31.79)
## 2 positive     26.26  (23.91,28.51)

Ct threshold of 32

# clustered rank-sum test
datTmp<-dat %>% filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<40) %>% mutate(group=ifelse(ELISA.Result=="positive",1,0))
clusranksum<-clusWilcox.test(PCRCT.Ct~group+cluster(as.integer(factor(PCR.PATID))),data=datTmp,method="ds")
pvalClusRankSum<-clusranksum$p.value

# mixed model
lmmRes<-lme(PCRCT.Ct~factor(ELISA.Result),random=~1|PCR.PATID,correlation=corCompSymm(form=~1|PCR.PATID),data=datTmp)
pvalMixed<-summary(lmmRes)$tTable[2,"p-value"]

# mixed censored regression
pDat<-dat %>%
  filter(!is.na(ELISA.Result)) %>%
  mutate(group=ifelse(ELISA.Result=="positive",1,0)) %>%
  pdata.frame(index=c("PCR.PATID","PCR.Visit"))

censRegMod<-censReg(PCRCT.Ct~group,data=pDat,right=40,method="BHHH",nGHQ=128) # https://cran.r-project.org/web/packages/censReg/vignettes/censReg.pdf

pvalMixedCensReg<-summary(censRegMod)$estimate["group","Pr(> t)"]

dat %>%
  filter(!is.na(ELISA.Result) & dat$PCRCT.Ct<=40) %>%
  mutate(pcrElisa=factor(case_when(
    ELISA.Result=="positive" & PCRCT.Result32=="positive"~"PcrPosElisaPos",
    ELISA.Result=="positive" & PCRCT.Result32=="negative"~"PcrNegElisaPos",
    ELISA.Result=="negative" & PCRCT.Result32=="positive"~"PcrPosElisaNeg",
    ELISA.Result=="negative" & PCRCT.Result32=="negative"~"PcrNegElisaNeg"
  ),levels=c("PcrNegElisaNeg","PcrPosElisaNeg","PcrNegElisaPos","PcrPosElisaPos"))) %>%
  ggplot(aes(x=ELISA.Result, y=PCRCT.Ct, color=pcrElisa)) + 
  geom_boxplot(col="darkgrey",outlier.size=0,outlier.shape=NA, fill="grey95") +
  geom_jitter(position=position_jitter(height=0,width=0.2),size=3) +
  scale_color_manual(values=c("steelblue4","steelblue1","orange4","orange1"),labels=c("PCR(-) ELISA(-)","PCR(+) ELISA(-)","PCR(-) ELISA(+)","PCR(+) ELISA(+)"),name="") +
  xlab("ELISA") + 
  ylab("qPCR Ct") +
  ylim(c(20.5,40.75)) +
  theme_light() +
  theme(text=element_text(size=20),legend.position="bottom") +
  geom_abline(slope=0,intercept=32,lty=2,col="grey35",lwd=1.25) +
  ggtitle("qPCR Ct values according to ELISA positivity.") +
  geom_text(mapping=aes(x=1.5,y=40.75,label=paste(sep="","p = ",format(nsmall=4,round(digits=4,pvalMixedCensReg)))),size=8,col="grey35") +
  geom_text(mapping=aes(x=0.55,y=32.5,label="Ct=32"),size=6,col="grey35")

print(g)

png(file=paste(sep="",outPrefix,"_Fig1_BoxJitter_Ct32.png"),height=12,width=10,units="in",res=450)
print(g)
dev.off()
## quartz_off_screen 
##                 2

The p-value reported on the figures above is from a mixed censored regression model and is the p-value testing the null hypothesis that the fixed effect coefficient for the grouping variable (ELISA positivity) is zero. This was implemented using the function censReg() from the censReg package in R (Henningsen, A., censReg: Censored Regression (Tobit) Models, v0.5-30, https://CRAN.R-project.org/package=censReg, 2019).

SUPPL. FIGURE 1: qPCR and ELISA positivity over time per patient.

qPCR Ct threshold of 35

plotqPCRELISATimeHeatmap<-function(plotDat,pid,xlab="",ylab="",ctThr=35){
  plotDat<-plotDat %>% 
    mutate(PCR.log2_first.rs = scales::rescale(PCR.log2_first,to=c(0.2,1)),
           ELISA.OD.norm.rs = scales::rescale(ELISA.OD.norm,to=c(0.2,1))) %>%
    filter(PCR.PATID==pid) %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Ct,PCRCT.Result,PCR.log2_first.rs,ELISA.OD.norm.rs,ELISA.Result)
  
  plotDat$PCRCT.Result<-factor(ifelse(plotDat$PCRCT.Ct<ctThr,"P","N"),levels=c("P","N"))
  plotDat$ELISA.Result<-factor(ifelse(plotDat$ELISA.Result=="positive","P","N"),levels=c("P","N"))
  
  plotDat$PCR.Visit<-factor(plotDat$PCR.Visit,levels=c("-1","1","2","3","4","5","6","7","8"))
  levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="7"]<-"19-24"
  levels(plotDat$PCR.Visit)[levels(plotDat$PCR.Visit)=="8"]<-"41-55"
  
  plotDatWide1<-plotDat %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCR.log2_first.rs,ELISA.OD.norm.rs) %>%
    pivot_longer(cols=c("PCR.log2_first.rs","ELISA.OD.norm.rs"),names_to="assay")
  plotDatWide1$assay<-factor(ifelse(plotDatWide1$assay=="PCR.log2_first.rs","qPCR","ELISA"))
  
  plotDatWide2<-plotDat %>%
    dplyr::select(PCR.PATID,PCR.Visit,PCR.treatment,PCRCT.Result,ELISA.Result) %>%
    pivot_longer(cols=c("PCRCT.Result","ELISA.Result"),names_to="assay")
  plotDatWide2$assay<-factor(ifelse(plotDatWide2$assay=="PCRCT.Result","qPCR","ELISA"))
  
  plotDatWide<-plotDatWide1
  plotDatWide$positivity<-plotDatWide2$value
  
  g1<-plotDatWide %>%
    group_by(assay) %>%
    complete(PCR.Visit) %>%
    ggplot(mapping=aes(y=assay,x=PCR.Visit,alpha=value,fill=assay)) +
    geom_tile(aes(alpha = value), color = "white") +
    scale_fill_manual(values=c("steelblue","orange"),drop=FALSE,na.value="gray") +
    coord_equal() +
    scale_alpha(na.value=0) +
    geom_text(mapping=aes(x=PCR.Visit,y=assay,label=positivity,alpha=1)) +
    theme(legend.position="none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.margin = unit(c(0.1,0.1,0.1,0), "pt"),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank()) +
    xlab(xlab) +
    ylab(ylab)
  
  datAnnot<-data.frame(
    x=rep(c(3,7,10),2),
    y=c(rep(2,3),rep(1,3)),
    label=c(pid,unique(plotDat$PCR.treatment[plotDat$PCR.PATID==pid & !is.na(plotDat$PCR.treatment)]),"qPCR","","","ELISA")
  )
  
  g2<-datAnnot %>%
    ggplot(mapping=aes(y=y,x=x)) +
    geom_tile(color = "white",alpha=0) +
    geom_text(mapping=aes(label=label)) +
    theme(legend.position="none",
          panel.grid.major = element_blank(),
          panel.grid.minor = element_blank(),
          plot.margin = unit(c(0.1,0,0.1,0.1), "pt"),
          panel.background = element_blank(),
          axis.line=element_blank(),
          axis.text.x=element_blank(),
          axis.text.y=element_blank(),
          axis.ticks=element_blank(),
          axis.title.x=element_blank(),
          axis.title.y=element_blank()) +
    coord_equal()
  
  g<-plot_grid(g2,g1, align = "h", nrow = 1, rel_heights = rep(1,2))
  
  return(g)
}

g<-list()

datTmp <- dat %>%
  filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}

datTmp <- dat %>%
  filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=35)
}

grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))

png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct35.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 35", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen 
##                 2

qPCR Ct threshold of 32

g<-list()

datTmp <- dat %>%
  filter(dat$PCR.treatment=="CFZ")
datTmp$PCR.PATID<-factor(paste(sep="","CFZ",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}

datTmp <- dat %>%
  filter(dat$PCR.treatment=="Placebo")
#datTmp$PCR.PATID<-factor(paste(sep="","PCB",formatC(width = 2, format = "d", flag = "0",as.integer(factor(datTmp$PCR.PATID)))))
datTmp$PCR.PATID<-factor(datTmp$dummyPID)

for(i in 1:length(levels(datTmp$PCR.PATID))){
  pid<-levels(datTmp$PCR.PATID)[i]
  g[[pid]]<-plotqPCRELISATimeHeatmap(datTmp,pid,ctThr=32)
}

grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))

png(file=paste(sep="",outPrefix,"_SupFig1_HeatMap_Ct32.png"),height=12,width=12,units="in",res=450)
grid.arrange(grobs=g,nrow=12,as.table=F,top=textGrob("Threshold Ct = 32", gp=gpar(fontsize=18)))
dev.off()
## quartz_off_screen 
##                 2

qPCR Ct values vs. qPCR derived log2 oocyst counts

Note that what looks like a single point at log2 oocyst per gram = 9.86, Ct=40 are in fact 4 samples:

CFZ0010025.Visit7, CFZ0010147.Visit4, CFZ0010147.Visit6, CFZ0010179.Visit6.

dat %>%
  #filter(PCRCT.Ct<35) %>%
ggplot(mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,col=PCRCT.Experiment)) +
  geom_point(size=2) +
  #geom_smooth(se=F) +
  #geom_text(data=dat[dat$PCRCT.Ct>35 | (dat$PCRCT.Ct>30 & dat$PCR.log2_first>15) | (dat$PCRCT.Ct<29 & dat$PCR.log2_first<11.5),],mapping=aes(x=PCR.log2_first,y=PCRCT.Ct,label=PCR.Sample.ID),nudge_x=0,nudge_y=0.35) +
  xlab("log2 oocyst per gram") +
  ylab("qPCR Ct value") +
  ggtitle("qPCR CT vs qPCR log2 oocyst per gram (first stool of the day).") +
  scale_color_manual(values=c("steelblue","salmon","orange","cyan","mediumorchid","darkgrey","brown","darkgreen","greenyellow","black"),name="PCR experiment") +
  theme(text=element_text(size=16))

These are all the potentially problematic samples:

datTmp<-dat[!is.na(dat$PCRCT.Ct) & dat$PCRCT.Ct>35,c("PCR.Sample.ID","PCRCT.Lab.Sample.ID","PCR.log2_first","PCRCT.Ct")]
datTmp<-datTmp[order(datTmp$PCRCT.Ct),]
kable(datTmp,row.names=F) %>%
  kable_styling()
PCR.Sample.ID PCRCT.Lab.Sample.ID PCR.log2_first PCRCT.Ct
CFZ0011548.Visit7 CDL127F1 8.17047 35.260
CFZ0011019.Visit3 CDH131F1 7.38882 35.414
CFZ0010010.Visit5 CDJ102F1 8.44481 35.545
CFZ0010147.Visit5 CDJ117F1 5.61746 36.206
CFZ0010147.Visit7 CDL10MF1 5.50515 36.320
CFZ0010147.Visit1 CDA10XF1 5.05775 36.776
CFZ0011086.Visit8 CDM14WF1 8.31257 37.091
CFZ0010293.Visit8 CDM10XF1 3.81738 38.003
CFZ0010025.Visit7 CDL108F1 9.85720 40.000
CFZ0010147.Visit4 CDI10NF1 9.85720 40.000
CFZ0010147.Visit6 CDE10PF1 9.85720 40.000
CFZ0010179.Visit6 CDE11EF1 9.85720 40.000

Screening data analysis

Note:

The table for RDT vs qPCR uses a qPCR threshold of 35 and it was not possible to generate the table for a threshold of 32. qPCR Ct values are only avilable for the enrolled subjects and hence I could not re-compute the qPCR positive/negative result for a new threshold from the data file that was shared with me.

TABLE 1: RDT (rows) vs qPCR (columns):

#table(preScreen$RDT.Result,preScreen$qPCR.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$qPCR.Result,useNA="ifany")

tt<-data.frame(var1="RDT",RDT=rownames(tt),PCRneg=tt[,"negative"],PCRpos=tt[,"positive"],PCRna=tt[,3],row.names = NULL)

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive","NA")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "qPCR" = 3)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
qPCR
negative positive NA
RDT negative 478 54 9
positive 0 21 0
NA 2 0 17

SUPPL. TABLE X: RDT (rows) vs. ELISA (columns):

#table(preScreen$RDT.Result,preScreen$ELISA.Result,useNA="ifany")
tt<-table(preScreen$RDT.Result,preScreen$ELISA.Result,useNA="ifany")

tt<-data.frame(var1="RDT",RDT=rownames(tt),ELISAneg=tt[,"negative"],ELISApos=tt[,"positive"],ELISAna=tt[,3],row.names = NULL)

kable(tt,caption="",row.names=F,col.names=c("","","negative","positive","NA")) %>%
  kable_styling(full_width = F) %>%
  add_header_above(c(" " = 2, "ELISA" = 3)) %>%
  column_spec(2, bold = T) %>%
  collapse_rows(columns = 1, valign = "middle")
ELISA
negative positive NA
RDT negative 14 2 525
positive 2 5 14
NA 0 0 19

Note that this is virtually the same as in Thomas’ presentation, but: * 5 more observations that are both RDT and qPCR negative than in Tom’s table (only 468 of these)

Exit R.

cat(file=logFile,append=T,"This is the end.\n")
## This is the end.